import scrapy

from ScrapyObject.spiders.utils.url_utils import *

'''
scrapy crawl ydag -o ydag.json
https://wyswspb.cfd/
'''

class YdagSpider(scrapy.Spider):
    name = "ydag"
    # 前缀
    prefix = 'https://'
    # 中缀
    website = 'wyswspb'
    # 后缀
    suffix = '.cfd/'
    allowed_domains = [website + '.cfd']

    def start_requests(self):
        for i in range(2006000, 3000000):
            yield scrapy.Request(split_joint(self.prefix + self.website + self.suffix, 'play/id/' + str(i) + '.html'), callback=self.parse)

    def __init__(self):
        self.i = 0

    def parse(self, response):
        # 获取字符串类型的网页内容
        content = get_data(response)
        video_url = get_video_url_three(content)
        p_url = re.findall(r'thumbnailUrl.*?,', content, re.IGNORECASE)
        tags = response.xpath("//li[@class='breadcrumb-item']//a/@ title").extract()
        names = response.xpath("//h1[@class='text-center']/text()").extract()
        if len(video_url) and len(p_url) and len(tags) and len(names):
            self.i = self.i + 1
            yield get_video_item(id=self.i, tags=tags[-1], url="", name=names[0], pUrl=p_url[0][16:-2], vUrl=format_url_one(video_url[0]))